import pandas as pd
import plotly.express as px
import numpy as np
import plotly.graph_objects as go
import sqlite3 as sq3
import plotly.offline as pyo
#Q1
df= pd.read_csv("https://raw.githubusercontent.com/smart-stats/ds4bio_book/main/book/assetts/kirby21AllLevels.csv")
df.head(4)
| Unnamed: 0 | rawid | roi | volume | min | max | mean | std | type | level | id | icv | tbv | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | kirby127a_3_1_ax.img | Telencephalon_L | 531111 | 0.0 | 374.0 | 128.3013 | 51.8593 | 1 | 1 | 127 | 1378295 | 1268519 |
| 1 | 2 | kirby127a_3_1_ax.img | Telencephalon_R | 543404 | 0.0 | 300.0 | 135.0683 | 53.6471 | 1 | 1 | 127 | 1378295 | 1268519 |
| 2 | 3 | kirby127a_3_1_ax.img | Diencephalon_L | 9683 | 15.0 | 295.0 | 193.5488 | 32.2733 | 1 | 1 | 127 | 1378295 | 1268519 |
| 3 | 4 | kirby127a_3_1_ax.img | Diencephalon_R | 9678 | 10.0 | 335.0 | 193.7051 | 32.7869 | 1 | 1 | 127 | 1378295 | 1268519 |
df = df.loc[(df.type == 1) & (df.rawid== "kirby127a_3_1_ax.img")]
df = df.assign(comp = df.volume / np.sum(df.volume))
df.head()
| Unnamed: 0 | rawid | roi | volume | min | max | mean | std | type | level | id | icv | tbv | comp | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | kirby127a_3_1_ax.img | Telencephalon_L | 531111 | 0.0 | 374.0 | 128.3013 | 51.8593 | 1 | 1 | 127 | 1378295 | 1268519 | 0.077066 |
| 1 | 2 | kirby127a_3_1_ax.img | Telencephalon_R | 543404 | 0.0 | 300.0 | 135.0683 | 53.6471 | 1 | 1 | 127 | 1378295 | 1268519 | 0.078850 |
| 2 | 3 | kirby127a_3_1_ax.img | Diencephalon_L | 9683 | 15.0 | 295.0 | 193.5488 | 32.2733 | 1 | 1 | 127 | 1378295 | 1268519 | 0.001405 |
| 3 | 4 | kirby127a_3_1_ax.img | Diencephalon_R | 9678 | 10.0 | 335.0 | 193.7051 | 32.7869 | 1 | 1 | 127 | 1378295 | 1268519 | 0.001404 |
| 4 | 5 | kirby127a_3_1_ax.img | Mesencephalon | 10268 | 55.0 | 307.0 | 230.8583 | 29.2249 | 1 | 1 | 127 | 1378295 | 1268519 | 0.001490 |
level1 = df.loc[(df.level == 1)]
group1= level1.groupby('roi')
group1.head(4)
| Unnamed: 0 | rawid | roi | volume | min | max | mean | std | type | level | id | icv | tbv | comp | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | kirby127a_3_1_ax.img | Telencephalon_L | 531111 | 0.0 | 374.0 | 128.3013 | 51.8593 | 1 | 1 | 127 | 1378295 | 1268519 | 0.077066 |
| 1 | 2 | kirby127a_3_1_ax.img | Telencephalon_R | 543404 | 0.0 | 300.0 | 135.0683 | 53.6471 | 1 | 1 | 127 | 1378295 | 1268519 | 0.078850 |
| 2 | 3 | kirby127a_3_1_ax.img | Diencephalon_L | 9683 | 15.0 | 295.0 | 193.5488 | 32.2733 | 1 | 1 | 127 | 1378295 | 1268519 | 0.001405 |
| 3 | 4 | kirby127a_3_1_ax.img | Diencephalon_R | 9678 | 10.0 | 335.0 | 193.7051 | 32.7869 | 1 | 1 | 127 | 1378295 | 1268519 | 0.001404 |
| 4 | 5 | kirby127a_3_1_ax.img | Mesencephalon | 10268 | 55.0 | 307.0 | 230.8583 | 29.2249 | 1 | 1 | 127 | 1378295 | 1268519 | 0.001490 |
| 5 | 6 | kirby127a_3_1_ax.img | Metencephalon | 159402 | 2.0 | 299.0 | 138.5200 | 52.2241 | 1 | 1 | 127 | 1378295 | 1268519 | 0.023130 |
| 6 | 7 | kirby127a_3_1_ax.img | Myelencephalon | 4973 | 12.0 | 286.0 | 199.8497 | 36.6501 | 1 | 1 | 127 | 1378295 | 1268519 | 0.000722 |
| 7 | 8 | kirby127a_3_1_ax.img | CSF | 109776 | 0.0 | 258.0 | 33.0193 | 26.3262 | 1 | 1 | 127 | 1378295 | 1268519 | 0.015929 |
level2 = df.loc[(df.level == 2)]
group2= level2.groupby('roi')
group2.head(4)
| Unnamed: 0 | rawid | roi | volume | min | max | mean | std | type | level | id | icv | tbv | comp | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8 | 9 | kirby127a_3_1_ax.img | CerebralCortex_L | 276965 | 0.0 | 249.0 | 88.5014 | 27.7510 | 1 | 2 | 127 | 1378295 | 1268519 | 0.040188 |
| 9 | 10 | kirby127a_3_1_ax.img | CerebralCortex_R | 282858 | 0.0 | 295.0 | 93.7283 | 28.1726 | 1 | 2 | 127 | 1378295 | 1268519 | 0.041044 |
| 10 | 11 | kirby127a_3_1_ax.img | CerebralNucli_L | 12380 | 14.0 | 273.0 | 165.5364 | 28.7978 | 1 | 2 | 127 | 1378295 | 1268519 | 0.001796 |
| 11 | 12 | kirby127a_3_1_ax.img | CerebralNucli_R | 13073 | 16.0 | 273.0 | 168.7348 | 30.4016 | 1 | 2 | 127 | 1378295 | 1268519 | 0.001897 |
| 12 | 13 | kirby127a_3_1_ax.img | Thalamus_L | 6342 | 38.0 | 268.0 | 192.1482 | 24.4523 | 1 | 2 | 127 | 1378295 | 1268519 | 0.000920 |
| 13 | 14 | kirby127a_3_1_ax.img | Thalamus_R | 6372 | 34.0 | 265.0 | 194.3232 | 27.0116 | 1 | 2 | 127 | 1378295 | 1268519 | 0.000925 |
| 14 | 15 | kirby127a_3_1_ax.img | BasalForebrain_L | 3341 | 15.0 | 295.0 | 196.2076 | 43.2780 | 1 | 2 | 127 | 1378295 | 1268519 | 0.000485 |
| 15 | 16 | kirby127a_3_1_ax.img | BasalForebrain_R | 3306 | 10.0 | 335.0 | 192.5140 | 41.6949 | 1 | 2 | 127 | 1378295 | 1268519 | 0.000480 |
| 16 | 17 | kirby127a_3_1_ax.img | Mesencephalon_L | 4943 | 57.0 | 296.0 | 228.3875 | 27.4134 | 1 | 2 | 127 | 1378295 | 1268519 | 0.000717 |
| 17 | 18 | kirby127a_3_1_ax.img | Mesencephalon_R | 5325 | 55.0 | 307.0 | 233.1521 | 30.6334 | 1 | 2 | 127 | 1378295 | 1268519 | 0.000773 |
| 18 | 19 | kirby127a_3_1_ax.img | Metencephalon_R | 79910 | 2.0 | 299.0 | 144.9297 | 53.1512 | 1 | 2 | 127 | 1378295 | 1268519 | 0.011595 |
| 19 | 20 | kirby127a_3_1_ax.img | Metencephalon_L | 79493 | 3.0 | 275.0 | 132.0767 | 50.4612 | 1 | 2 | 127 | 1378295 | 1268519 | 0.011535 |
| 20 | 21 | kirby127a_3_1_ax.img | Myelencephalon_L | 2403 | 25.0 | 283.0 | 196.0509 | 37.1295 | 1 | 2 | 127 | 1378295 | 1268519 | 0.000349 |
| 21 | 22 | kirby127a_3_1_ax.img | Myelencephalon_R | 2571 | 12.0 | 286.0 | 203.4001 | 35.8340 | 1 | 2 | 127 | 1378295 | 1268519 | 0.000373 |
| 22 | 23 | kirby127a_3_1_ax.img | WhiteMatter_L | 241767 | 2.0 | 374.0 | 171.9891 | 34.4828 | 1 | 2 | 127 | 1378295 | 1268519 | 0.035081 |
| 23 | 24 | kirby127a_3_1_ax.img | WhiteMatter_R | 247473 | 10.0 | 300.0 | 180.5410 | 35.8129 | 1 | 2 | 127 | 1378295 | 1268519 | 0.035909 |
| 24 | 25 | kirby127a_3_1_ax.img | Ventricle | 45791 | 0.0 | 258.0 | 37.3184 | 34.2745 | 1 | 2 | 127 | 1378295 | 1268519 | 0.006644 |
| 25 | 26 | kirby127a_3_1_ax.img | Sulcus_L | 33971 | 0.0 | 137.0 | 29.4653 | 17.5085 | 1 | 2 | 127 | 1378295 | 1268519 | 0.004929 |
| 26 | 27 | kirby127a_3_1_ax.img | Sulcus_R | 30015 | 0.0 | 161.0 | 30.4828 | 18.6214 | 1 | 2 | 127 | 1378295 | 1268519 | 0.004355 |
level3 = df.loc[(df.level == 3)]
group3= level3.groupby('roi').mean('volume')
group3.head(4)
| Unnamed: 0 | volume | min | max | mean | std | type | level | id | icv | tbv | comp | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| roi | ||||||||||||
| AnteriorWM_L | 54.0 | 87012.0 | 9.0 | 374.0 | 172.2390 | 33.3689 | 1.0 | 3.0 | 127.0 | 1378295.0 | 1268519.0 | 0.012626 |
| AnteriorWM_R | 55.0 | 91755.0 | 11.0 | 278.0 | 172.5715 | 31.9999 | 1.0 | 3.0 | 127.0 | 1378295.0 | 1268519.0 | 0.013314 |
| BasalForebrain_L | 44.0 | 3341.0 | 15.0 | 295.0 | 196.2076 | 43.2780 | 1.0 | 3.0 | 127.0 | 1378295.0 | 1268519.0 | 0.000485 |
| BasalForebrain_R | 45.0 | 3306.0 | 10.0 | 335.0 | 192.5140 | 41.6949 | 1.0 | 3.0 | 127.0 | 1378295.0 | 1268519.0 | 0.000480 |
level4 = df.loc[(df.level == 4)]
group4= level4.groupby('roi').mean('volume')
group4.head(4)
| Unnamed: 0 | volume | min | max | mean | std | type | level | id | icv | tbv | comp | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| roi | ||||||||||||
| AG_L | 100.0 | 7430.0 | 4.0 | 159.0 | 75.3894 | 21.2562 | 1.0 | 4.0 | 127.0 | 1378295.0 | 1268519.0 | 0.001078 |
| AG_R | 101.0 | 13580.0 | 1.0 | 173.0 | 83.2116 | 25.0679 | 1.0 | 4.0 | 127.0 | 1378295.0 | 1268519.0 | 0.001970 |
| ALIC_L | 162.0 | 2344.0 | 142.0 | 278.0 | 222.3963 | 21.9805 | 1.0 | 4.0 | 127.0 | 1378295.0 | 1268519.0 | 0.000340 |
| ALIC_R | 163.0 | 2528.0 | 131.0 | 263.0 | 222.2013 | 18.3836 | 1.0 | 4.0 | 127.0 | 1378295.0 | 1268519.0 | 0.000367 |
label = ["icv", "level 1", "level 2", "level 3", "level 4", "Diencephalon_L", "Diencephalon_R", "CerebralCortex_L", "CerebralCortex_R", "AnteriorWM_L", "AnteriorWM_R", "AG_L", "AG_R"]
source = [0, 0, 0, 0,
1, 1,
2, 2,
3, 3,
4, 4]
target = [ 1, 2, 3, 4,
5, 6,
7, 8,
9, 10,
11, 12]
value = [1378295, 1378295, 1378295, 1378295,
531111,543404,
276965, 282858,
87012,91755,
7430,13580]
link= dict(source = source, target = target, value= value)
node = dict(label = label, pad=15, thickness=5)
df = go.Sankey(link = link, node=node)
fig = go.Figure(df)
fig.show()
pyo.plot(fig, filename='figure1plot.html')
'figure1plot.html'
import sqlite3 as sq3
import pandas as pd
#Q3
con = sq3.connect("C:\sqlite3/opioid.db")
population_info = pd.read_sql_query("SELECT * from population", con)
con.close
population_info.head(4)
| ? | BUYER_COUNTY | BUYER_STATE | countyfips | STATE | COUNTY | county_name | NAME | variable | year | population | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | AUTAUGA | AL | 01001 | 1 | 1 | Autauga | Autauga County, Alabama | B01003_001 | 2006 | 51328 |
| 1 | 2 | BALDWIN | AL | 01003 | 1 | 3 | Baldwin | Baldwin County, Alabama | B01003_001 | 2006 | 168121 |
| 2 | 3 | BARBOUR | AL | 01005 | 1 | 5 | Barbour | Barbour County, Alabama | B01003_001 | 2006 | 27861 |
| 3 | 4 | BIBB | AL | 01007 | 1 | 7 | Bibb | Bibb County, Alabama | B01003_001 | 2006 | 22099 |
con = sq3.connect("C:\sqlite3/opioid.db")
annual_info = pd.read_sql_query("SELECT * from annual", con)
con.close
annual_info.head
<bound method NDFrame.head of ? BUYER_COUNTY BUYER_STATE year count DOSAGE_UNIT countyfips 0 1 ABBEVILLE SC 2006 877 363620 45001 1 2 ABBEVILLE SC 2007 908 402940 45001 2 3 ABBEVILLE SC 2008 871 424590 45001 3 4 ABBEVILLE SC 2009 930 467230 45001 4 5 ABBEVILLE SC 2010 1197 539280 45001 ... ... ... ... ... ... ... ... 27753 27754 NA NV 2007 447 200600 NA 27754 27755 NA NV 2008 5 2200 NA 27755 27756 NA OH 2006 23 5100 NA 27756 27757 NA PR 2006 10 17800 NA 27757 27758 NA PR 2007 2 1300 NA [27758 rows x 7 columns]>
annual= annual_info.loc[(annual_info.countyfips == 'NA')]
annual.head()
| ? | BUYER_COUNTY | BUYER_STATE | year | count | DOSAGE_UNIT | countyfips | |
|---|---|---|---|---|---|---|---|
| 187 | 188 | ADJUNTAS | PR | 2006 | 147 | 102800 | NA |
| 188 | 189 | ADJUNTAS | PR | 2007 | 153 | 104800 | NA |
| 189 | 190 | ADJUNTAS | PR | 2008 | 153 | 45400 | NA |
| 190 | 191 | ADJUNTAS | PR | 2009 | 184 | 54200 | NA |
| 191 | 192 | ADJUNTAS | PR | 2010 | 190 | 56200 | NA |
annual= annual.loc[(annual.BUYER_STATE != "PR") & (annual.countyfips == 'NA')]
annual.head()
| ? | BUYER_COUNTY | BUYER_STATE | year | count | DOSAGE_UNIT | countyfips | |
|---|---|---|---|---|---|---|---|
| 10071 | 10072 | GUAM | GU | 2006 | 319 | 265348 | NA |
| 10072 | 10073 | GUAM | GU | 2007 | 330 | 275600 | NA |
| 10073 | 10074 | GUAM | GU | 2008 | 313 | 286900 | NA |
| 10074 | 10075 | GUAM | GU | 2009 | 390 | 355300 | NA |
| 10075 | 10076 | GUAM | GU | 2010 | 510 | 413800 | NA |
annual = annual.loc[(annual.BUYER_STATE == "AR") & (annual.BUYER_COUNTY == "MONTGOMERY")].replace({"NA":"5097"})
annual.head()
| ? | BUYER_COUNTY | BUYER_STATE | year | count | DOSAGE_UNIT | countyfips | |
|---|---|---|---|---|---|---|---|
| 17429 | 17430 | MONTGOMERY | AR | 2006 | 469 | 175390 | 5097 |
| 17430 | 17431 | MONTGOMERY | AR | 2007 | 597 | 241270 | 5097 |
| 17431 | 17432 | MONTGOMERY | AR | 2008 | 561 | 251760 | 5097 |
| 17432 | 17433 | MONTGOMERY | AR | 2009 | 554 | 244160 | 5097 |
| 17433 | 17434 | MONTGOMERY | AR | 2010 | 449 | 247990 | 5097 |
annual_info.loc[(annual_info['BUYER_STATE'] == 'AR') & (annual_info['BUYER_COUNTY'] == 'MONTGOMERY'), 'countyfips'] = '05097'
annual_info.head()
| ? | BUYER_COUNTY | BUYER_STATE | year | count | DOSAGE_UNIT | countyfips | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | ABBEVILLE | SC | 2006 | 877 | 363620 | 45001 |
| 1 | 2 | ABBEVILLE | SC | 2007 | 908 | 402940 | 45001 |
| 2 | 3 | ABBEVILLE | SC | 2008 | 871 | 424590 | 45001 |
| 3 | 4 | ABBEVILLE | SC | 2009 | 930 | 467230 | 45001 |
| 4 | 5 | ABBEVILLE | SC | 2010 | 1197 | 539280 | 45001 |
annual_info= annual_info.loc[annual_info['BUYER_COUNTY'] != "NA"]
annual_info.head()
| ? | BUYER_COUNTY | BUYER_STATE | year | count | DOSAGE_UNIT | countyfips | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | ABBEVILLE | SC | 2006 | 877 | 363620 | 45001 |
| 1 | 2 | ABBEVILLE | SC | 2007 | 908 | 402940 | 45001 |
| 2 | 3 | ABBEVILLE | SC | 2008 | 871 | 424590 | 45001 |
| 3 | 4 | ABBEVILLE | SC | 2009 | 930 | 467230 | 45001 |
| 4 | 5 | ABBEVILLE | SC | 2010 | 1197 | 539280 | 45001 |
con = sq3.connect("C:\sqlite3/opioid.db")
land_info = pd.read_sql_query("SELECT * from land", con)
con.close
land_info.head()
| ? | Areaname | STCOU | LND010190F | LND010190D | LND010190N1 | LND010190N2 | LND010200F | LND010200D | LND010200N1 | ... | LND110210N1 | LND110210N2 | LND210190F | LND210190D | LND210190N1 | LND210190N2 | LND210200F | LND210200D | LND210200N1 | LND210200N2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | UNITED STATES | 00000 | 0 | 3787425.08 | 0000 | 0000 | 0 | 3794083.06 | 0000 | ... | 0000 | 0000 | 0 | 251083.35 | 0000 | 0000 | 0 | 256644.62 | 0000 | 0000 |
| 1 | 2 | ALABAMA | 01000 | 0 | 52422.94 | 0000 | 0000 | 0 | 52419.02 | 0000 | ... | 0000 | 0000 | 0 | 1672.71 | 0000 | 0000 | 0 | 1675.01 | 0000 | 0000 |
| 2 | 3 | Autauga, AL | 01001 | 0 | 604.49 | 0000 | 0000 | 0 | 604.45 | 0000 | ... | 0000 | 0000 | 0 | 8.48 | 0000 | 0000 | 0 | 8.48 | 0000 | 0000 |
| 3 | 4 | Baldwin, AL | 01003 | 0 | 2027.08 | 0000 | 0000 | 0 | 2026.93 | 0000 | ... | 0000 | 0000 | 0 | 430.55 | 0000 | 0000 | 0 | 430.58 | 0000 | 0000 |
| 4 | 5 | Barbour, AL | 01005 | 0 | 904.59 | 0000 | 0000 | 0 | 904.52 | 0000 | ... | 0000 | 0000 | 0 | 19.59 | 0000 | 0000 | 0 | 19.61 | 0000 | 0000 |
5 rows × 35 columns
land = land_info.loc[:,['Areaname', 'STCOU', 'LND110210D']]
land.head(10)
| Areaname | STCOU | LND110210D | |
|---|---|---|---|
| 0 | UNITED STATES | 00000 | 3531905.43 |
| 1 | ALABAMA | 01000 | 50645.33 |
| 2 | Autauga, AL | 01001 | 594.44 |
| 3 | Baldwin, AL | 01003 | 1589.78 |
| 4 | Barbour, AL | 01005 | 884.88 |
| 5 | Bibb, AL | 01007 | 622.58 |
| 6 | Blount, AL | 01009 | 644.78 |
| 7 | Bullock, AL | 01011 | 622.81 |
| 8 | Butler, AL | 01013 | 776.83 |
| 9 | Calhoun, AL | 01015 | 605.87 |
land= land.rename(columns = {'STCOU':'countyfips'})
land.head(10)
| Areaname | countyfips | LND110210D | |
|---|---|---|---|
| 0 | UNITED STATES | 00000 | 3531905.43 |
| 1 | ALABAMA | 01000 | 50645.33 |
| 2 | Autauga, AL | 01001 | 594.44 |
| 3 | Baldwin, AL | 01003 | 1589.78 |
| 4 | Barbour, AL | 01005 | 884.88 |
| 5 | Bibb, AL | 01007 | 622.58 |
| 6 | Blount, AL | 01009 | 644.78 |
| 7 | Bullock, AL | 01011 | 622.81 |
| 8 | Butler, AL | 01013 | 776.83 |
| 9 | Calhoun, AL | 01015 | 605.87 |
population_info.merge(land, on='countyfips', how='left')
| ? | BUYER_COUNTY | BUYER_STATE | countyfips | STATE | COUNTY | county_name | NAME | variable | year | population | Areaname | LND110210D | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | AUTAUGA | AL | 01001 | 1 | 1 | Autauga | Autauga County, Alabama | B01003_001 | 2006 | 51328 | Autauga, AL | 594.44 |
| 1 | 2 | BALDWIN | AL | 01003 | 1 | 3 | Baldwin | Baldwin County, Alabama | B01003_001 | 2006 | 168121 | Baldwin, AL | 1589.78 |
| 2 | 3 | BARBOUR | AL | 01005 | 1 | 5 | Barbour | Barbour County, Alabama | B01003_001 | 2006 | 27861 | Barbour, AL | 884.88 |
| 3 | 4 | BIBB | AL | 01007 | 1 | 7 | Bibb | Bibb County, Alabama | B01003_001 | 2006 | 22099 | Bibb, AL | 622.58 |
| 4 | 5 | BLOUNT | AL | 01009 | 1 | 9 | Blount | Blount County, Alabama | B01003_001 | 2006 | 55485 | Blount, AL | 644.78 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 28260 | 28261 | WASHAKIE | WY | 56043 | 56 | 43 | Washakie | Washakie County, Wyoming | B01003_001 | 2014 | 8444 | Washakie, WY | 2238.55 |
| 28261 | 28262 | WESTON | WY | 56045 | 56 | 45 | Weston | Weston County, Wyoming | B01003_001 | 2014 | 7135 | Weston, WY | 2398.09 |
| 28262 | 28263 | SKAGWAY | AK | 02230 | 2 | 230 | Skagway | Skagway Municipality, Alaska | B01003_001 | 2014 | 996 | Skagway, AK | 452.33 |
| 28263 | 28264 | HOONAH ANGOON | AK | 02105 | 2 | 105 | Hoonah Angoon | Hoonah-Angoon Census Area, Alaska | B01003_001 | 2014 | 2126 | Hoonah-Angoon, AK | 7524.92 |
| 28264 | 28265 | PETERSBURG | AK | 02195 | 2 | 195 | Petersburg | Petersburg Borough, Alaska | B01003_001 | 2014 | 3212 | Petersburg, AK | 3281.98 |
28265 rows × 13 columns
land_info.shape
(3198, 35)
land.shape
(3198, 3)
population_info.shape
(28265, 11)
annual_info.shape
(27741, 7)
#Q4
summary = annual_info[['countyfips', 'year', 'DOSAGE_UNIT']]
summary
| countyfips | year | DOSAGE_UNIT | |
|---|---|---|---|
| 0 | 45001 | 2006 | 363620 |
| 1 | 45001 | 2007 | 402940 |
| 2 | 45001 | 2008 | 424590 |
| 3 | 45001 | 2009 | 467230 |
| 4 | 45001 | 2010 | 539280 |
| ... | ... | ... | ... |
| 27736 | 48507 | 2010 | 200100 |
| 27737 | 48507 | 2011 | 244800 |
| 27738 | 48507 | 2012 | 263700 |
| 27739 | 48507 | 2013 | 186700 |
| 27740 | 48507 | 2014 | 148930 |
27741 rows × 3 columns
summary_avg = summary.groupby(['year', 'countyfips'])['DOSAGE_UNIT'].mean().reset_index()
summary_avg.head()
| year | countyfips | DOSAGE_UNIT | |
|---|---|---|---|
| 0 | 2006 | 01001 | 2277140.0 |
| 1 | 2006 | 01003 | 6353798.0 |
| 2 | 2006 | 01005 | 827060.0 |
| 3 | 2006 | 01007 | 754210.0 |
| 4 | 2006 | 01009 | 1290295.0 |
fig2 = px.scatter(summary_avg, x = "year", y = "DOSAGE_UNIT", color = "countyfips", title='Average count of opioid pills per year in millions')
figure2 = go.Figure(fig2)
figure2.show()
pyo.plot(figure2, filename='figure2plot.html')
'figure2plot.html'
##Q2 & Q4 link for public html with graphs
https://azeher1.github.io/